In [1]:
import pandas as pd
import re
import matplotlib.pyplot as plt

# Load the CSV, skipping the metadata/header rows
df = pd.read_csv('Kanishk_K562_TNFA_Validation_Optical_Results.csv', skiprows=19)

# Select relevant columns and drop rows with missing values
df = df[['Sample', 'Biological Set Name', 'Cq']].dropna()

# Extract numeric time for sorting (e.g., "10 min" -> 10)
def extract_time(s):
    match = re.match(r'(\d+)\s*min', str(s))
    return int(match.group(1)) if match else -1

# Create a new column for sorting
df['time_numeric'] = df['Biological Set Name'].apply(extract_time)

# Get unique Biological Set Names and sort them by extracted time
sorted_time_names = df.drop_duplicates('Biological Set Name').sort_values('time_numeric')['Biological Set Name']

# Pivot the table
pivot = df.pivot(index='Sample', columns='Biological Set Name', values='Cq')

# Reindex columns to ensure correct order
pivot = pivot.reindex(columns=sorted_time_names)

pivot_table=pivot

pivot_table
Out[1]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 20.167819 20.057232 20.032432 20.162431 20.184199 20.323662 20.301582 20.242956 20.251312 20.054655 20.240633 20.300083
GAPDH 21.432254 21.114981 21.073946 21.137320 20.904244 21.168053 21.009488 21.048110 20.973592 20.769913 20.765195 20.187977
HPRT1 29.886828 29.958761 30.046877 30.193573 29.970664 30.576978 30.232387 31.233056 30.099213 30.102881 30.227369 30.441267
ICAM1 26.574846 26.325522 25.723978 23.702137 22.447005 22.015139 21.482922 21.353166 20.881415 20.612188 20.579137 20.791574
IER3 26.882741 26.473618 24.288434 23.268375 22.756264 22.959706 23.012005 22.984936 22.782780 22.935748 22.817715 23.383582
IL8 30.369014 30.748940 29.307146 28.438746 27.953410 28.294158 28.263602 28.155412 27.720445 27.881261 27.712730 27.757117
NFKBIA 26.347702 25.626266 23.297582 22.464121 22.000991 22.033389 21.984129 21.982766 21.805209 21.947809 22.016316 22.263624
TNFRSF9 36.513770 35.399690 34.987913 31.495087 30.034275 30.142218 29.225559 28.909810 28.081962 28.056233 28.109982 28.587318
In [2]:
#ACTB has more stable expression. Selecting it for Normalization. Also had Pipetting error for last GAPDH Sample at 110min. Ran out of RT-PCR Mix
#Getting Delta Ct
#Dropping HPRT1 and GAPDH from Analysis
pivot_table = pivot_table.drop(['HPRT1', 'GAPDH'], axis=0)
pivot_table
Out[2]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 20.167819 20.057232 20.032432 20.162431 20.184199 20.323662 20.301582 20.242956 20.251312 20.054655 20.240633 20.300083
ICAM1 26.574846 26.325522 25.723978 23.702137 22.447005 22.015139 21.482922 21.353166 20.881415 20.612188 20.579137 20.791574
IER3 26.882741 26.473618 24.288434 23.268375 22.756264 22.959706 23.012005 22.984936 22.782780 22.935748 22.817715 23.383582
IL8 30.369014 30.748940 29.307146 28.438746 27.953410 28.294158 28.263602 28.155412 27.720445 27.881261 27.712730 27.757117
NFKBIA 26.347702 25.626266 23.297582 22.464121 22.000991 22.033389 21.984129 21.982766 21.805209 21.947809 22.016316 22.263624
TNFRSF9 36.513770 35.399690 34.987913 31.495087 30.034275 30.142218 29.225559 28.909810 28.081962 28.056233 28.109982 28.587318
In [3]:
#Getting Delta Ct
delta_ct = pivot_table.sub(pivot_table.loc['ACTB'], axis=1)
delta_ct
Out[3]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ICAM1 6.407027 6.268290 5.691546 3.539706 2.262807 1.691477 1.181340 1.110209 0.630103 0.557533 0.338504 0.491491
IER3 6.714922 6.416386 4.256002 3.105945 2.572066 2.636043 2.710423 2.741980 2.531469 2.881093 2.577082 3.083500
IL8 10.201196 10.691708 9.274714 8.276315 7.769211 7.970496 7.962020 7.912455 7.469134 7.826606 7.472096 7.457035
NFKBIA 6.179883 5.569034 3.265150 2.301690 1.816792 1.709727 1.682548 1.739810 1.553897 1.893154 1.775683 1.963542
TNFRSF9 16.345951 15.342458 14.955481 11.332656 9.850076 9.818556 8.923977 8.666854 7.830650 8.001578 7.869348 8.287236
In [4]:
#Setting first timepoint as control
control_delta_ct = delta_ct['0 min']
#Getting Delta Delta Ct
delta_delta_ct = delta_ct.sub(control_delta_ct, axis=0)
delta_delta_ct
Out[4]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ICAM1 0.0 -0.138737 -0.715481 -2.867320 -4.144220 -4.715550 -5.225687 -5.296818 -5.776924 -5.849494 -6.068523 -5.915536
IER3 0.0 -0.298537 -2.458920 -3.608977 -4.142857 -4.078879 -4.004499 -3.972942 -4.183453 -3.833829 -4.137840 -3.631422
IL8 0.0 0.490512 -0.926481 -1.924881 -2.431984 -2.230699 -2.239175 -2.288740 -2.732062 -2.374590 -2.729099 -2.744161
NFKBIA 0.0 -0.610849 -2.914733 -3.878193 -4.363090 -4.470156 -4.497335 -4.440073 -4.625986 -4.286728 -4.404200 -4.216341
TNFRSF9 0.0 -1.003493 -1.390470 -5.013295 -6.495875 -6.527395 -7.421974 -7.679097 -8.515300 -8.344372 -8.476603 -8.058715
In [5]:
#Getting Fold Change
fold_change = 2 ** (-delta_delta_ct) 
fold_change #Values have too high a range will plot -delta_delta_ct only
Out[5]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 1.0 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
ICAM1 1.0 1.100941 1.642031 7.297086 17.682132 26.273752 37.418690 39.309816 54.831152 57.659786 67.113111 60.360620
IER3 1.0 1.229896 5.498051 12.201422 17.665425 16.899149 16.049978 15.702719 18.169583 14.259277 17.604103 12.392732
IL8 1.0 0.711772 1.900635 3.797054 5.396351 4.693615 4.721271 4.886292 6.644045 5.185883 6.630415 6.699999
NFKBIA 1.0 1.527158 7.540882 14.704572 20.578849 22.164143 22.585663 21.706772 24.692237 19.517933 21.173676 18.588537
TNFRSF9 1.0 2.004848 2.621641 32.296249 90.251244 92.244751 171.489161 204.945572 365.898699 325.017229 356.214517 266.633697
In [6]:
positive_ddct=-delta_delta_ct
positive_ddct #Changing sign
Out[6]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB -0.0 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000
ICAM1 -0.0 0.138737 0.715481 2.867320 4.144220 4.715550 5.225687 5.296818 5.776924 5.849494 6.068523 5.915536
IER3 -0.0 0.298537 2.458920 3.608977 4.142857 4.078879 4.004499 3.972942 4.183453 3.833829 4.137840 3.631422
IL8 -0.0 -0.490512 0.926481 1.924881 2.431984 2.230699 2.239175 2.288740 2.732062 2.374590 2.729099 2.744161
NFKBIA -0.0 0.610849 2.914733 3.878193 4.363090 4.470156 4.497335 4.440073 4.625986 4.286728 4.404200 4.216341
TNFRSF9 -0.0 1.003493 1.390470 5.013295 6.495875 6.527395 7.421974 7.679097 8.515300 8.344372 8.476603 8.058715
In [7]:
positive_ddct.T.plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation.svg", format='svg')

So a Time Delay before the activation of TNFRSF9 and ICAM1 is evident in the qPCR data as well.

In [8]:
positive_ddct.T[["NFKBIA","IL8"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_IL8.svg", format='svg')
In [9]:
positive_ddct.T[["NFKBIA","ICAM1"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_ICAM1.svg", format='svg')
In [10]:
positive_ddct.T[["NFKBIA","TNFRSF9"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_TNFRSF9.svg", format='svg')
In [11]:
positive_ddct.T[["NFKBIA","IER3"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_IER3.svg", format='svg')
In [12]:
(100/pivot_table).T.plot(xlabel="Time",ylabel="Relative Concentration 100/Cq")
plt.savefig("qPCR_GenePlots_Relative_concentration_100DivCq.svg", format='svg')
In [13]:
scK562_genes=pd.read_csv("Selected_Genes_scK562.csv",index_col=0)
In [14]:
scK562_genes
Out[14]:
0 1 2 3 4 5 6 7 8 9 10 11
ACTB 2.994008 2.930615 2.863599 2.971982 2.928550 2.970020 2.926710 3.000689 2.948781 2.836841 2.892733 3.007348
ICAM1 0.086006 0.082477 0.085802 0.225258 0.443590 0.515901 0.764676 0.888616 0.891994 0.867710 0.750336 0.949493
IER3 0.058151 0.048934 0.210205 0.516017 0.562482 0.531894 0.433471 0.412251 0.382959 0.346042 0.350955 0.289744
IL8 0.106400 0.089052 0.417153 0.731696 1.071399 1.079914 1.080593 1.097727 1.061795 1.109638 0.992169 1.244911
NFKBIA 0.573597 0.612698 1.392816 2.252066 2.594812 2.718964 2.547843 2.437945 2.236336 2.259228 2.101444 2.186365
TNFRSF9 0.014858 0.003576 0.019713 0.028735 0.051805 0.132589 0.175997 0.247497 0.379532 0.342595 0.417288 0.442723
In [15]:
scK562_genes.columns=pivot_table.columns
In [16]:
scK562_genes.index=pivot_table.index
In [17]:
scK562_genes
Out[17]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 2.994008 2.930615 2.863599 2.971982 2.928550 2.970020 2.926710 3.000689 2.948781 2.836841 2.892733 3.007348
ICAM1 0.086006 0.082477 0.085802 0.225258 0.443590 0.515901 0.764676 0.888616 0.891994 0.867710 0.750336 0.949493
IER3 0.058151 0.048934 0.210205 0.516017 0.562482 0.531894 0.433471 0.412251 0.382959 0.346042 0.350955 0.289744
IL8 0.106400 0.089052 0.417153 0.731696 1.071399 1.079914 1.080593 1.097727 1.061795 1.109638 0.992169 1.244911
NFKBIA 0.573597 0.612698 1.392816 2.252066 2.594812 2.718964 2.547843 2.437945 2.236336 2.259228 2.101444 2.186365
TNFRSF9 0.014858 0.003576 0.019713 0.028735 0.051805 0.132589 0.175997 0.247497 0.379532 0.342595 0.417288 0.442723
In [18]:
scK562_genes.T.corrwith((100/pivot_table).T)
Out[18]:
Sample
ACTB      -0.605591
ICAM1      0.957025
IER3       0.878033
IL8        0.959505
NFKBIA     0.955798
TNFRSF9    0.839289
dtype: float64
In [19]:
scK562_genes.T.corrwith((pivot_table).T)
Out[19]:
Sample
ACTB       0.604427
ICAM1     -0.946985
IER3      -0.874428
IL8       -0.959311
NFKBIA    -0.951711
TNFRSF9   -0.811246
dtype: float64
In [20]:
100/pivot_table
Out[20]:
Biological Set Name 0 min 10 min 20 min 30 min 40 min 50 min 60 min 70 min 80 min 90 min 100 min 110 min
Sample
ACTB 4.958394 4.985733 4.991905 4.959719 4.954371 4.920373 4.925725 4.939990 4.937952 4.986373 4.940557 4.926088
ICAM1 3.762957 3.798595 3.887424 4.219029 4.454937 4.542329 4.654860 4.683146 4.788948 4.851498 4.859290 4.809641
IER3 3.719859 3.777345 4.117186 4.297679 4.394394 4.355457 4.345558 4.350676 4.389280 4.360006 4.382560 4.276505
IL8 3.292830 3.252145 3.412137 3.516329 3.577381 3.534298 3.538119 3.551715 3.607446 3.586638 3.608450 3.602680
NFKBIA 3.795397 3.902246 4.292291 4.451543 4.545250 4.538566 4.548736 4.549018 4.586060 4.556263 4.542086 4.491632
TNFRSF9 2.738693 2.824883 2.858130 3.175098 3.329529 3.317606 3.421663 3.459033 3.561005 3.564270 3.557455 3.498055
In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

# Data from the first table (expression levels over time for sample 1)
data1 = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
    'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
    'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
    'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
    'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
    'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 0.41728783, 0.44272283]
}

# Data from the second table (expression levels over time for sample 2)
data2 = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
    'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
    'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
    'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
    'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
    'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}

# Convert to DataFrames
df1 = pd.DataFrame(data1).set_index('Time')
df2 = pd.DataFrame(data2).set_index('Time')

# Plotting gene expression over time for the first dataset
plt.figure(figsize=(12, 6))
for gene in df1.columns:
    plt.plot(df1.index, df1[gene], marker='o', label=gene)
plt.title('Gene Expression Levels Over Time - ChronoSeq')
plt.xlabel('Time')
plt.ylabel('Expression Level')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Plotting gene expression over time for the second dataset
plt.figure(figsize=(12, 6))
for gene in df2.columns:
    plt.plot(df2.index, df2[gene], marker='o', label=gene)
plt.title('Gene Expression Levels Over Time - qPCR')
plt.xlabel('Time')
plt.ylabel('Expression Level')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate Pearson correlation and p-values for each gene
correlations = {}
p_values = {}
genes = df1.columns
for gene in genes:
    # Ensure we are correlating the numerical values
    corr, p_val = pearsonr(df1[gene].astype(float), df2[gene].astype(float))
    correlations[gene] = corr
    p_values[gene] = p_val

# Output correlation and p-values dictionaries
print("Correlations:", correlations)
print("P-values:", p_values)

# Plotting correlation coefficients
plt.figure(figsize=(10, 5))
plt.bar(correlations.keys(), correlations.values(), color='skyblue')
plt.title('Pearson Correlation Coefficients Between Gene Expression Levels')
plt.xlabel('Genes')
plt.ylabel('Correlation Coefficient')
plt.ylim(-1, 1) # Correlation coefficient ranges from -1 to 1
plt.grid(axis='y', linestyle='--')
plt.show()

# Plotting p-values on a logarithmic scale
plt.figure(figsize=(10, 5))
plt.bar(p_values.keys(), p_values.values(), color='salmon')
plt.title('P-values for Correlation Tests Between Gene Expression Levels')
plt.xlabel('Genes')
plt.ylabel('P-value (log scale)')
plt.yscale('log') # Use logarithmic scale for p-values as they can be very small
plt.grid(axis='y', linestyle='--')
plt.show()
Correlations: {'ACTB': -0.6055940421318541, 'ICAM1': 0.9570251356459543, 'IER3': 0.878033279489262, 'IL8': 0.9595046097297201, 'NFKBIA': 0.9557978646892042, 'TNFRSF9': 0.8392894913265406}
P-values: {'ACTB': 0.03690132234849123, 'ICAM1': 1.0738900866252006e-06, 'IER3': 0.00017261060928962815, 'IL8': 8.011961244752918e-07, 'NFKBIA': 1.2336734402499755e-06, 'TNFRSF9': 0.000640405351540273}
In [22]:
correlations
Out[22]:
{'ACTB': -0.6055940421318541,
 'ICAM1': 0.9570251356459543,
 'IER3': 0.878033279489262,
 'IL8': 0.9595046097297201,
 'NFKBIA': 0.9557978646892042,
 'TNFRSF9': 0.8392894913265406}
In [23]:
p_values
Out[23]:
{'ACTB': 0.03690132234849123,
 'ICAM1': 1.0738900866252006e-06,
 'IER3': 0.00017261060928962815,
 'IL8': 8.011961244752918e-07,
 'NFKBIA': 1.2336734402499755e-06,
 'TNFRSF9': 0.000640405351540273}
In [24]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean
from scipy.signal import correlate
from scipy.stats import pearsonr

# Data from the first table (expression levels over time for sample 1)
data1 = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
    'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
    'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
    'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
    'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
    'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 0.41728783, 0.44272283]
}

# Data from the second table (expression levels over time for sample 2)
data2 = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
    'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
    'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
    'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
    'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
    'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}

df1 = pd.DataFrame(data1).set_index('Time')
df2 = pd.DataFrame(data2).set_index('Time')

# DTW approximation using Euclidean distance and lagged cross-correlation
# Since dtw package is not available, we will use cross-correlation to approximate similarity and lag

dtw_approx_results = {}
ccf_results = {}

for gene in df1.columns:
    x = df1[gene].values
    y = df2[gene].values

    # Normalize the series
    x_norm = (x - np.mean(x)) / np.std(x)
    y_norm = (y - np.mean(y)) / np.std(y)

    # Cross-correlation
    ccf = correlate(x_norm, y_norm, mode='full')
    lags = np.arange(-len(x_norm) + 1, len(x_norm))
    max_ccf_index = np.argmax(ccf)
    max_ccf = ccf[max_ccf_index]
    lag_at_max_ccf = lags[max_ccf_index]

    # Pearson correlation as a proxy for similarity
    r, p = pearsonr(x, y)

    dtw_approx_results[gene] = {'max_ccf': max_ccf, 'lag': lag_at_max_ccf, 'pearson_r': r, 'pearson_p': p}
    ccf_results[gene] = {'ccf': ccf, 'lags': lags}

dtw_approx_results
Out[24]:
{'ACTB': {'max_ccf': 4.566926973563788,
  'lag': -2,
  'pearson_r': -0.6055940421318541,
  'pearson_p': 0.03690132234849123},
 'ICAM1': {'max_ccf': 11.48430162775145,
  'lag': 0,
  'pearson_r': 0.9570251356459543,
  'pearson_p': 1.0738900866252006e-06},
 'IER3': {'max_ccf': 10.536399353871143,
  'lag': 0,
  'pearson_r': 0.878033279489262,
  'pearson_p': 0.00017261060928962815},
 'IL8': {'max_ccf': 11.51405531675664,
  'lag': 0,
  'pearson_r': 0.9595046097297201,
  'pearson_p': 8.011961244752918e-07},
 'NFKBIA': {'max_ccf': 11.46957437627045,
  'lag': 0,
  'pearson_r': 0.9557978646892042,
  'pearson_p': 1.2336734402499755e-06},
 'TNFRSF9': {'max_ccf': 10.071473895918485,
  'lag': 0,
  'pearson_r': 0.8392894913265406,
  'pearson_p': 0.000640405351540273}}
In [25]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine

# Data from the first table (expression levels over time for sample 1)
data1 = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
    'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
    'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
    'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
    'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
    'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 0.41728783, 0.44272283]
}

# Data from the second table (expression levels over time for sample 2)
data2 = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
    'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
    'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
    'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
    'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
    'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}

# Convert to DataFrames
df1 = pd.DataFrame(data1).set_index('Time')
df2 = pd.DataFrame(data2).set_index('Time')

# Calculate cosine distance for each gene
cosine_distances = {}
genes = df1.columns

print("Cosine Distances:")
for gene in genes:
    # Extract expression values for the current gene from both datasets
    vector1 = df1[gene].values
    vector2 = df2[gene].values

    # Calculate cosine distance
    # cosine() function returns 1 - cosine_similarity
    # If vectors are identical, distance is 0.
    # If vectors are orthogonal, distance is 1.
    # If vectors are opposite, distance is 2 (for data that can be negative).
    # For positive data like gene expression, distance will be between 0 and 1.
    dist = cosine(vector1, vector2)
    cosine_distances[gene] = dist
    print(f"{gene}: {dist:.6f}")

# You can also store them in a DataFrame for better presentation if needed
# cosine_distances_df = pd.DataFrame(list(cosine_distances.items()), columns=['Gene', 'Cosine_Distance'])
# print("\nCosine Distances DataFrame:")
# print(cosine_distances_df)
Cosine Distances:
ACTB: 0.000219
ICAM1: 0.105884
IER3: 0.076341
IL8: 0.080463
NFKBIA: 0.039885
TNFRSF9: 0.199376
In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine

def create_cosine_distance_bar_chart(cosine_distances_dict, save_filename='cosine_distance_bar_chart.svg'):
    """
    Create a publication-quality bar chart of cosine distances between ChronoSeq and qPCR methods

    Parameters:
    cosine_distances_dict: dict with gene names as keys and cosine distances as values
    save_filename: string, filename to save the plot
    """

    # Set publication style parameters
    plt.rcParams.update({
        'font.size': 12,
        'font.family': 'Arial',
        'axes.linewidth': 1.2,
        'axes.spines.top': False,
        'axes.spines.right': False,
        'xtick.major.size': 4,
        'ytick.major.size': 4,
        'figure.dpi': 300,
        'savefig.dpi': 300,
        'savefig.bbox': 'tight'
    })

    # Gene colors for consistency with other plots
    gene_colors = {
        'ACTB': '#1f77b4',    # Blue - housekeeping
        'ICAM1': '#ff7f0e',   # Orange
        'IER3': '#2ca02c',    # Green
        'IL8': '#d62728',     # Red
        'NFKBIA': '#9467bd',  # Purple
        'TNFRSF9': '#8c564b'  # Brown
    }

    # Extract data for plotting
    genes = list(cosine_distances_dict.keys())
    distances = list(cosine_distances_dict.values())
    colors = [gene_colors[gene] for gene in genes]

    # Create the figure and axis
    fig, ax = plt.subplots(1, 1, figsize=(10, 6))

    # Create the bar chart
    bars = ax.bar(genes, distances, color=colors, alpha=0.8, 
                  edgecolor='black', linewidth=1)

    # Add value labels on top of bars
    for i, (bar, distance) in enumerate(zip(bars, distances)):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.005,
                f'{distance:.3f}', ha='center', va='bottom', 
                fontsize=11, fontweight='bold')

    # Customize the plot
    ax.set_ylabel('Cosine Distance', fontsize=14, fontweight='bold')
    ax.set_xlabel('Gene', fontsize=14, fontweight='bold')
    ax.set_title('Cosine Distance Between ChronoSeq and qPCR\nTemporal Expression Patterns', 
                fontsize=16, fontweight='bold', pad=20)

    # Set y-axis limits with some padding
    max_distance = max(distances)
    ax.set_ylim(0, max_distance * 1.15)

    '''# Add a reference line at 0.1 (optional threshold)
    ax.axhline(y=0.1, color='gray', linestyle='--', alpha=0.5, linewidth=1)
    ax.text(len(genes)-1, 0.1 + 0.01, 'Reference (0.1)', 
            ha='right', va='bottom', fontsize=10, color='gray')'''

    # Add grid for better readability
    ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
    ax.set_axisbelow(True)

    # Rotate x-axis labels if needed for better readability
    plt.xticks(rotation=0, ha='center')

    # Adjust layout and save
    plt.tight_layout()
    plt.savefig(save_filename, dpi=300, bbox_inches='tight')
    plt.show()

    return fig, ax

def calculate_and_plot_cosine_distances(chronoseq_data, qpcr_data):
    """
    Calculate cosine distances and create bar chart from raw data

    Parameters:
    chronoseq_data: dict with gene expression data from ChronoSeq
    qpcr_data: dict with gene expression data from qPCR
    """

    # Convert to DataFrames
    df1 = pd.DataFrame(chronoseq_data).set_index('Time')
    df2 = pd.DataFrame(qpcr_data).set_index('Time')

    # Calculate cosine distances
    cosine_distances = {}
    genes = df1.columns

    print("Calculating cosine distances...")
    for gene in genes:
        vector1 = df1[gene].values
        vector2 = df2[gene].values
        dist = cosine(vector1, vector2)
        cosine_distances[gene] = dist
        print(f"{gene}: {dist:.6f}")

    # Create the bar chart
    fig, ax = create_cosine_distance_bar_chart(cosine_distances)

    return cosine_distances, fig, ax

# Example usage with your data
if __name__ == "__main__":

    # Your ChronoSeq data (scRNA-seq pseudobulk)
    chronoseq_data = {
        'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
                 '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
        'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 
                 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
        'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 
                  0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
        'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 
                 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
        'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 
                1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
        'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 
                   2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
        'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 
                    0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 
                    0.41728783, 0.44272283]
    }

    # Your qPCR data (100/Cq values)
    qpcr_data = {
        'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
                 '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
        'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 
                 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
        'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 
                  4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
        'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 
                 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
        'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 
                3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
        'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 
                   4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
        'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 
                    3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
    }

    # Calculate and plot
    distances, fig, ax = calculate_and_plot_cosine_distances(chronoseq_data, qpcr_data)

    print("\nBar chart created successfully!")
    print("File saved as: cosine_distance_bar_chart.svg")

    # Alternative: If you already have calculated distances
    # precalculated_distances = {
    #     'ACTB': 0.000219,
    #     'ICAM1': 0.105884,
    #     'IER3': 0.076341,
    #     'IL8': 0.080463,
    #     'NFKBIA': 0.039885,
    #     'TNFRSF9': 0.199376
    # }
    # fig, ax = create_cosine_distance_bar_chart(precalculated_distances)
findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.
findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.
findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.
findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.
Calculating cosine distances...
ACTB: 0.000219
ICAM1: 0.105884
IER3: 0.076341
IL8: 0.080463
NFKBIA: 0.039885
TNFRSF9: 0.199376
Bar chart created successfully!
File saved as: cosine_distance_bar_chart.png
In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine

def create_cosine_similarity_bar_chart(cosine_similarities_dict, save_filename='cosine_similarity_bar_chart.svg'):
    """
    Create a publication-quality bar chart of cosine similarities between ChronoSeq and qPCR methods

    Parameters:
    cosine_similarities_dict: dict with gene names as keys and cosine similarities as values
    save_filename: string, filename to save the plot
    """

    # Set publication style parameters
    plt.rcParams.update({
        'font.size': 12,
        'font.family': 'Arial',
        'axes.linewidth': 1.2,
        'axes.spines.top': False,
        'axes.spines.right': False,
        'xtick.major.size': 4,
        'ytick.major.size': 4,
        'figure.dpi': 300,
        'savefig.dpi': 300,
        'savefig.bbox': 'tight'
    })

    # Gene colors for consistency with other plots
    gene_colors = {
        'ACTB': '#1f77b4',    # Blue - housekeeping
        'ICAM1': '#ff7f0e',   # Orange
        'IER3': '#2ca02c',    # Green
        'IL8': '#d62728',     # Red
        'NFKBIA': '#9467bd',  # Purple
        'TNFRSF9': '#8c564b'  # Brown
    }

    # Extract data for plotting
    genes = list(cosine_similarities_dict.keys())
    similarities = list(cosine_similarities_dict.values())
    colors = [gene_colors[gene] for gene in genes]

    # Create the figure and axis
    fig, ax = plt.subplots(1, 1, figsize=(10, 6))

    # Create the bar chart
    bars = ax.bar(genes, similarities, color=colors, alpha=0.8, 
                  edgecolor='black', linewidth=1)

    # Add value labels on top of bars
    for i, (bar, similarity) in enumerate(zip(bars, similarities)):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.005,
                f'{similarity:.3f}', ha='center', va='bottom', 
                fontsize=11, fontweight='bold')

    # Customize the plot
    ax.set_ylabel('Cosine Similarity', fontsize=14, fontweight='bold')
    ax.set_xlabel('Gene', fontsize=14, fontweight='bold')
    ax.set_title('Cosine Similarity Between ChronoSeq and qPCR\nTemporal Expression Patterns', 
             fontsize=16, fontweight='bold', pad=30)

    # Set y-axis limits for better resolution (focus on the range of data)
    min_similarity = min(similarities)
    max_similarity = max(similarities)
    #y_range = max_similarity - min_similarity
    #ax.set_ylim(max(0, min_similarity - 0.05), min(1.0, max_similarity + 0.02))

    # Add reference lines for interpretation
    ax.axhline(y=0.9, color='red', linestyle='--', alpha=0.6, linewidth=1.5)
    ax.text(len(genes)-0.5, 0.9 + 0.01, 'High Similarity (0.9)', 
            ha='right', va='bottom', fontsize=10, color='red', fontweight='bold')

    ax.axhline(y=0.8, color='orange', linestyle='--', alpha=0.5, linewidth=1)
    ax.text(len(genes)-0.5, 0.78, 'Good Similarity (0.8)', 
            ha='right', va='top', fontsize=10, color='darkorange', fontweight='bold')


    # Add grid for better readability
    ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
    ax.set_axisbelow(True)

    # Rotate x-axis labels if needed for better readability
    plt.xticks(rotation=0, ha='center')

    # Add interpretation text box higher up (top center)
    interpretation_text = (
        "Higher bars = Better agreement\n"
        "Range: 0.0 (opposite) to 1.0 (identical)"
    )
    ax.text(0.5, 0.99, interpretation_text, transform=ax.transAxes, 
            fontsize=9, verticalalignment='bottom', horizontalalignment='center',
            bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))


    # Adjust layout and save
    plt.tight_layout()
    plt.savefig(save_filename, format='svg', bbox_inches='tight')
    plt.show()

    return fig, ax

def calculate_and_plot_cosine_similarities(chronoseq_data, qpcr_data):
    """
    Calculate cosine similarities and create bar chart from raw data

    Parameters:
    chronoseq_data: dict with gene expression data from ChronoSeq
    qpcr_data: dict with gene expression data from qPCR
    """

    # Convert to DataFrames
    df1 = pd.DataFrame(chronoseq_data).set_index('Time')
    df2 = pd.DataFrame(qpcr_data).set_index('Time')

    # Calculate cosine distances and convert to similarities
    cosine_similarities = {}
    genes = df1.columns

    print("Calculating cosine similarities...")
    print("=" * 50)
    for gene in genes:
        vector1 = df1[gene].values
        vector2 = df2[gene].values
        distance = cosine(vector1, vector2)
        similarity = 1 - distance  # Convert distance to similarity
        cosine_similarities[gene] = similarity
        print(f"{gene:8}: {similarity:.6f} (distance: {distance:.6f})")

    print(f"\nSummary:")
    print(f"Mean similarity: {np.mean(list(cosine_similarities.values())):.3f}")
    print(f"Std similarity:  {np.std(list(cosine_similarities.values())):.3f}")
    print(f"Min similarity:  {min(cosine_similarities.values()):.3f}")
    print(f"Max similarity:  {max(cosine_similarities.values()):.3f}")

    # Create the bar chart
    fig, ax = create_cosine_similarity_bar_chart(cosine_similarities)

    return cosine_similarities, fig, ax

def convert_distances_to_similarities(cosine_distances_dict):
    """
    Convert cosine distances to cosine similarities

    Parameters:
    cosine_distances_dict: dict with gene names as keys and cosine distances as values

    Returns:
    dict with gene names as keys and cosine similarities as values
    """
    cosine_similarities = {}
    for gene, distance in cosine_distances_dict.items():
        similarity = 1 - distance
        cosine_similarities[gene] = similarity

    return cosine_similarities

# Example usage with your data
if __name__ == "__main__":

    # Your ChronoSeq data (scRNA-seq pseudobulk)
    chronoseq_data = {
        'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
                 '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
        'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 
                 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
        'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 
                  0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
        'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 
                 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
        'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 
                1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
        'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 
                   2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
        'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 
                    0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 
                    0.41728783, 0.44272283]
    }

    # Your qPCR data (100/Cq values)
    qpcr_data = {
        'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
                 '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
        'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 
                 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
        'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 
                  4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
        'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 
                 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
        'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 
                3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
        'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 
                   4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
        'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 
                    3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
    }

    # Method 1: Calculate from raw data
    similarities, fig, ax = calculate_and_plot_cosine_similarities(chronoseq_data, qpcr_data)

    print("\nSimilarity bar chart created successfully!")
    print("File saved as: cosine_similarity_bar_chart.svg")

    # Method 2: If you already have calculated distances, convert them
    # precalculated_distances = {
    #     'ACTB': 0.000219,
    #     'ICAM1': 0.105884,
    #     'IER3': 0.076341,
    #     'IL8': 0.080463,
    #     'NFKBIA': 0.039885,
    #     'TNFRSF9': 0.199376
    # }
    # similarities = convert_distances_to_similarities(precalculated_distances)
    # fig, ax = create_cosine_similarity_bar_chart(similarities)
Calculating cosine similarities...
==================================================
ACTB    : 0.999781 (distance: 0.000219)
ICAM1   : 0.894116 (distance: 0.105884)
IER3    : 0.923659 (distance: 0.076341)
IL8     : 0.919537 (distance: 0.080463)
NFKBIA  : 0.960115 (distance: 0.039885)
TNFRSF9 : 0.800624 (distance: 0.199376)

Summary:
Mean similarity: 0.916
Std similarity:  0.062
Min similarity:  0.801
Max similarity:  1.000
Similarity bar chart created successfully!
File saved as: cosine_similarity_bar_chart.svg
In [45]:
# Panel D: Combined Mean Expression vs Cosine Similarity
# Single Plot Analysis

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
from scipy import stats

# Set publication style
plt.rcParams.update({
    'font.size': 12,
    'font.family': 'Arial',
    'axes.linewidth': 1.2,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'xtick.major.size': 4,
    'ytick.major.size': 4,
    'figure.dpi': 100,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Data
chronoseq_data = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
             '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 
             2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
    'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 
              0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
    'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 
             0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
    'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 
            1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
    'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 
               2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
    'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 
                0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 
                0.41728783, 0.44272283]
}

qpcr_data = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
             '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 
             4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
    'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 
              4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
    'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 
             4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
    'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 
            3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
    'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 
               4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
    'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 
                3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}

# Gene colors
gene_colors = {
    'ACTB': '#1f77b4',    # Blue - housekeeping
    'ICAM1': '#ff7f0e',   # Orange
    'IER3': '#2ca02c',    # Green
    'IL8': '#d62728',     # Red
    'NFKBIA': '#9467bd',  # Purple
    'TNFRSF9': '#8c564b'  # Brown
}

# Convert to DataFrames
df_chronoseq = pd.DataFrame(chronoseq_data).set_index('Time')
df_qpcr = pd.DataFrame(qpcr_data).set_index('Time')

# Calculate metrics
def calculate_combined_expression_similarity():
    """Calculate cosine similarities and combined mean expression levels"""

    analysis_data = []
    genes = df_chronoseq.columns

    for gene in genes:
        chronoseq_vals = df_chronoseq[gene].values
        qpcr_vals = df_qpcr[gene].values

        # Calculate cosine similarity
        distance = cosine(chronoseq_vals, qpcr_vals)
        similarity = 1 - distance

        # Combined mean expression
        chronoseq_mean = np.mean(chronoseq_vals)
        qpcr_mean = np.mean(qpcr_vals)
        combined_mean = (chronoseq_mean + qpcr_mean) / 2

        analysis_data.append({
            'Gene': gene,
            'Cosine_Similarity': similarity,
            'Combined_Mean_Expression': combined_mean
        })

    return pd.DataFrame(analysis_data)

# Perform analysis
results_df = calculate_combined_expression_similarity()

# Calculate correlation
correlation = results_df['Combined_Mean_Expression'].corr(results_df['Cosine_Similarity'])
r_squared = correlation ** 2

print("Combined Mean Expression vs Cosine Similarity Analysis")
print("=" * 55)
print(results_df.round(4))
print(f"\nCorrelation coefficient (r): {correlation:.3f}")
print(f"R-squared (R²): {r_squared:.3f}")

# Create the plot
fig, ax = plt.subplots(1, 1, figsize=(10, 8))

# Plot points with gene-specific colors and labels
for _, row in results_df.iterrows():
    ax.scatter(row['Combined_Mean_Expression'], row['Cosine_Similarity'], 
               color=gene_colors[row['Gene']], s=120, alpha=0.8,
               edgecolors='black', linewidth=1.5)

    # Add gene labels with slight offset for readability
    ax.annotate(row['Gene'], 
                (row['Combined_Mean_Expression'], row['Cosine_Similarity']),
                xytext=(8, 8), textcoords='offset points', 
                fontsize=11, fontweight='bold',
                bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))

# Add trend line
x = results_df['Combined_Mean_Expression']
y = results_df['Cosine_Similarity']
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
x_trend = np.linspace(x.min(), x.max(), 100)
ax.plot(x_trend, p(x_trend), 'k--', alpha=0.8, linewidth=2.5, label='Trend Line')

# Customize the plot
ax.set_xlabel('Combined Mean Expression Level', fontsize=14, fontweight='bold')
ax.set_ylabel('Cosine Similarity', fontsize=14, fontweight='bold')
ax.set_title('Combined Mean Expression vs Cosine Similarity\nChronoSeq & qPCR Method Agreement', 
             fontsize=16, fontweight='bold', pad=20)

# Add correlation info to plot
ax.text(0.05, 0.95, f'r = {correlation:.3f}', 
        transform=ax.transAxes, fontsize=12, fontweight='bold',
        bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8),
        verticalalignment='top')

# Add grid
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)

# Set axis limits with some padding
x_range = x.max() - x.min()
y_range = y.max() - y.min()
ax.set_xlim(x.min() - 0.1*x_range, x.max() + 0.1*x_range)
ax.set_ylim(y.min() - 0.02, y.max() + 0.02)

# Enhance tick labels
ax.tick_params(axis='both', which='major', labelsize=11)

plt.tight_layout()
plt.savefig('combined_expression_similarity.svg', format='svg', bbox_inches='tight')
plt.show()

# Statistical analysis
print("\n" + "="*55)
print("STATISTICAL ANALYSIS")
print("="*55)

# Linear regression details
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

print(f"Linear regression equation: y = {slope:.4f}x + {intercept:.4f}")
print(f"Correlation coefficient: {r_value:.3f}")
print(f"P-value: {p_value:.2e}")
print(f"Standard error: {std_err:.4f}")
print(f"R-squared: {r_value**2:.3f}")

print("\nINTERPRETATION:")
print("• Strong positive correlation between combined expression and similarity")
print("• Higher expression levels predict better method agreement")
print("• Combined metric explains {:.1f}% of similarity variance".format(r_value**2 * 100))
print("• Relationship is statistically significant (p < 0.05)" if p_value < 0.05 else "• Relationship may not be statistically significant")

# Gene-specific insights
print("\nGENE-SPECIFIC INSIGHTS:")
for _, row in results_df.iterrows():
    gene = row['Gene']
    expr = row['Combined_Mean_Expression']
    sim = row['Cosine_Similarity']
    print(f"{gene:8}: Expression = {expr:.2f}, Similarity = {sim:.3f}")

print("\nKEY FINDINGS:")
highest_expr = results_df.loc[results_df['Combined_Mean_Expression'].idxmax()]
lowest_expr = results_df.loc[results_df['Combined_Mean_Expression'].idxmin()]
print(f"• Highest expression: {highest_expr['Gene']} (similarity: {highest_expr['Cosine_Similarity']:.3f})")
print(f"• Lowest expression: {lowest_expr['Gene']} (similarity: {lowest_expr['Cosine_Similarity']:.3f})")
print(f"• Expression range: {results_df['Combined_Mean_Expression'].min():.2f} - {results_df['Combined_Mean_Expression'].max():.2f}")
print(f"• Similarity range: {results_df['Cosine_Similarity'].min():.3f} - {results_df['Cosine_Similarity'].max():.3f}")
Combined Mean Expression vs Cosine Similarity Analysis
=======================================================
      Gene  Cosine_Similarity  Combined_Mean_Expression
0     ACTB             0.9998                    3.9458
1    ICAM1             0.8941                    2.4944
2     IER3             0.9237                    2.2879
3      IL8             0.9195                    2.1734
4   NFKBIA             0.9601                    3.1964
5  TNFRSF9             0.8006                    1.7318

Correlation coefficient (r): 0.879
R-squared (R²): 0.772
=======================================================
STATISTICAL ANALYSIS
=======================================================
Linear regression equation: y = 0.0742x + 0.7206
Correlation coefficient: 0.879
P-value: 2.12e-02
Standard error: 0.0202
R-squared: 0.772

INTERPRETATION:
• Strong positive correlation between combined expression and similarity
• Higher expression levels predict better method agreement
• Combined metric explains 77.2% of similarity variance
• Relationship is statistically significant (p < 0.05)

GENE-SPECIFIC INSIGHTS:
ACTB    : Expression = 3.95, Similarity = 1.000
ICAM1   : Expression = 2.49, Similarity = 0.894
IER3    : Expression = 2.29, Similarity = 0.924
IL8     : Expression = 2.17, Similarity = 0.920
NFKBIA  : Expression = 3.20, Similarity = 0.960
TNFRSF9 : Expression = 1.73, Similarity = 0.801

KEY FINDINGS:
• Highest expression: ACTB (similarity: 1.000)
• Lowest expression: TNFRSF9 (similarity: 0.801)
• Expression range: 1.73 - 3.95
• Similarity range: 0.801 - 1.000
In [47]:
# Temporal Gene Expression Analysis: ChronoSeq vs qPCR
# Time Series Plots for TNF-α Stimulated K562 Cells

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set publication style
plt.rcParams.update({
    'font.size': 12,
    'font.family': 'Arial',
    'axes.linewidth': 1.2,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'xtick.major.size': 4,
    'ytick.major.size': 4,
    'figure.dpi': 100,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Data
chronoseq_data = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
             '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 
             2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
    'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 
              0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
    'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 
             0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
    'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 
            1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
    'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 
               2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
    'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 
                0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 
                0.41728783, 0.44272283]
}

qpcr_data = {
    'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', 
             '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
    'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 
             4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
    'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 
              4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
    'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 
             4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
    'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 
            3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
    'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 
               4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
    'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 
                3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}

# Convert to DataFrames
df_chronoseq = pd.DataFrame(chronoseq_data).set_index('Time')
df_qpcr = pd.DataFrame(qpcr_data).set_index('Time')

# Time points and gene setup
time_points = np.arange(0, 120, 10)
genes = ['ACTB', 'ICAM1', 'IER3', 'IL8', 'NFKBIA', 'TNFRSF9']

# Gene colors and markers
gene_colors = {
    'ACTB': '#1f77b4',    # Blue - housekeeping
    'ICAM1': '#ff7f0e',   # Orange
    'IER3': '#2ca02c',    # Green
    'IL8': '#d62728',     # Red
    'NFKBIA': '#9467bd',  # Purple
    'TNFRSF9': '#8c564b'  # Brown
}

gene_markers = {
    'ACTB': 'o',      # Circle
    'ICAM1': 's',     # Square
    'IER3': '^',      # Triangle up
    'IL8': 'D',       # Diamond
    'NFKBIA': '*',    # Star
    'TNFRSF9': 'p'    # Pentagon
}

def plot_chronoseq_timeseries():
    """Plot ChronoSeq temporal gene expression profiles"""

    fig, ax = plt.subplots(1, 1, figsize=(12, 8))

    # Plot each gene
    for gene in genes:
        expression_values = df_chronoseq[gene].values
        ax.plot(time_points, expression_values, 
               color=gene_colors[gene], marker=gene_markers[gene],
               linewidth=2.5, markersize=8, alpha=0.8,
               label=gene, markeredgecolor='black', markeredgewidth=0.5)

    # Customize plot
    ax.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
    ax.set_ylabel('Expression Level (ChronoSeq)', fontsize=14, fontweight='bold')
    ax.set_title('ChronoSeq Pseudobulk Temporal Gene Expression Profiles\nTNF-α Stimulated K562 Cells', 
                fontsize=16, fontweight='bold', pad=20)

    # Set axis properties
    ax.set_xlim(-5, 115)
    ax.set_xticks([0, 30, 60, 90, 110])
    ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
    ax.set_axisbelow(True)

    # Add legend
    ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), 
             frameon=False, fontsize=11)

    plt.tight_layout()
    plt.savefig('chronoseq_timeseries.svg', format='svg', bbox_inches='tight')
    plt.show()

    return fig, ax

def plot_qpcr_timeseries():
    """Plot qPCR temporal gene expression profiles"""

    fig, ax = plt.subplots(1, 1, figsize=(12, 8))

    # Plot each gene
    for gene in genes:
        expression_values = df_qpcr[gene].values
        ax.plot(time_points, expression_values, 
               color=gene_colors[gene], marker=gene_markers[gene],
               linewidth=2.5, markersize=8, alpha=0.8,
               label=gene, markeredgecolor='black', markeredgewidth=0.5)

    # Customize plot
    ax.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
    ax.set_ylabel('Expression Level (100/Cq)', fontsize=14, fontweight='bold')
    ax.set_title('qPCR Temporal Gene Expression Profiles\nTNF-α Stimulated K562 Cells (100/Cq Values)', 
                fontsize=16, fontweight='bold', pad=20)

    # Set axis properties
    ax.set_xlim(-5, 115)
    ax.set_xticks([0, 30, 60, 90, 110])
    ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
    ax.set_axisbelow(True)

    # Add legend
    ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), 
             frameon=False, fontsize=11)

    plt.tight_layout()
    plt.savefig('qpcr_timeseries.svg', format='svg', bbox_inches='tight')
    plt.show()

    return fig, ax

def analyze_temporal_patterns():
    """Analyze and compare temporal expression patterns"""

    print("Temporal Expression Pattern Analysis")
    print("=" * 50)

    # Calculate expression ranges and dynamics for each gene
    analysis_results = []

    for gene in genes:
        chronoseq_vals = df_chronoseq[gene].values
        qpcr_vals = df_qpcr[gene].values

        # Calculate dynamics
        chronoseq_range = np.max(chronoseq_vals) - np.min(chronoseq_vals)
        chronoseq_fold_change = np.max(chronoseq_vals) / np.min(chronoseq_vals)
        chronoseq_peak_time = time_points[np.argmax(chronoseq_vals)]

        qpcr_range = np.max(qpcr_vals) - np.min(qpcr_vals)
        qpcr_fold_change = np.max(qpcr_vals) / np.min(qpcr_vals)
        qpcr_peak_time = time_points[np.argmax(qpcr_vals)]

        analysis_results.append({
            'Gene': gene,
            'ChronoSeq_Range': chronoseq_range,
            'ChronoSeq_FoldChange': chronoseq_fold_change,
            'ChronoSeq_PeakTime': chronoseq_peak_time,
            'qPCR_Range': qpcr_range,
            'qPCR_FoldChange': qpcr_fold_change,
            'qPCR_PeakTime': qpcr_peak_time
        })

    results_df = pd.DataFrame(analysis_results)
    print(results_df.round(3))

    return results_df

def plot_both_timeseries_comparison():
    """Create side-by-side comparison of both methods"""

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

    # ChronoSeq plot
    for gene in genes:
        expression_values = df_chronoseq[gene].values
        ax1.plot(time_points, expression_values, 
                color=gene_colors[gene], marker=gene_markers[gene],
                linewidth=2.5, markersize=8, alpha=0.8,
                label=gene, markeredgecolor='black', markeredgewidth=0.5)

    ax1.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
    ax1.set_ylabel('Expression Level', fontsize=14, fontweight='bold')
    ax1.set_title('A. ChronoSeq scRNA-seq', fontsize=16, fontweight='bold')
    ax1.set_xlim(-5, 115)
    ax1.set_xticks([0, 30, 60, 90, 110])
    ax1.grid(True, alpha=0.3)
    ax1.legend(loc='upper left', frameon=False, fontsize=10)

    # qPCR plot
    for gene in genes:
        expression_values = df_qpcr[gene].values
        ax2.plot(time_points, expression_values, 
                color=gene_colors[gene], marker=gene_markers[gene],
                linewidth=2.5, markersize=8, alpha=0.8,
                label=gene, markeredgecolor='black', markeredgewidth=0.5)

    ax2.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
    ax2.set_ylabel('Expression Level (100/Cq)', fontsize=14, fontweight='bold')
    ax2.set_title('B. qPCR', fontsize=16, fontweight='bold')
    ax2.set_xlim(-5, 115)
    ax2.set_xticks([0, 30, 60, 90, 110])
    ax2.grid(True, alpha=0.3)
    ax2.legend(loc='upper left', frameon=False, fontsize=10)

    # Overall title
    fig.suptitle('Temporal Gene Expression Profiles: ChronoSeq vs qPCR\nTNF-α Stimulated K562 Cells', 
                fontsize=18, fontweight='bold', y=0.95)

    plt.tight_layout()
    plt.savefig('both_timeseries_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()

    return fig, (ax1, ax2)

# Execute all functions
if __name__ == "__main__":
    print("Creating ChronoSeq time series plot...")
    fig1, ax1 = plot_chronoseq_timeseries()

    print("\nCreating qPCR time series plot...")
    fig2, ax2 = plot_qpcr_timeseries()

    print("\nAnalyzing temporal patterns...")
    analysis_df = analyze_temporal_patterns()

    print("\nCreating side-by-side comparison plot...")
    fig3, (ax3, ax4) = plot_both_timeseries_comparison()

    print("\nTemporal analysis complete!")
    print("Files saved:")
    print("- chronoseq_timeseries.png")
    print("- qpcr_timeseries.png") 
    print("- both_timeseries_comparison.png")

    # Key biological insights
    print("\n" + "="*60)
    print("KEY BIOLOGICAL INSIGHTS")
    print("="*60)

    print("\nTEMPORAL RESPONSE PATTERNS:")
    print("• ACTB (housekeeping): Stable expression in both methods")
    print("• ICAM1 (adhesion): Gradual upregulation, sustained response")
    print("• IER3 (early response): Rapid peak ~30-40min, then decline")
    print("• IL8 (cytokine): Progressive increase, peak at end")
    print("• NFKBIA (NFκB inhibitor): Classic early peak ~50min")
    print("• TNFRSF9 (TNF receptor): Gradual sustained increase")

    print("\nMETHOD COMPARISON:")
    print("• ChronoSeq captures fine temporal resolution")
    print("• qPCR shows similar patterns with different absolute levels")
    print("• Both methods identify key response phases:")
    print("  - Early response (0-30 min): IER3 activation") 
    print("  - Mid response (30-60 min): NFKBIA peak, ICAM1 rise")
    print("  - Late response (60-110 min): IL8 and TNFRSF9 sustained increase")
Creating ChronoSeq time series plot...
Creating qPCR time series plot...
Analyzing temporal patterns...
Temporal Expression Pattern Analysis
==================================================
      Gene  ChronoSeq_Range  ChronoSeq_FoldChange  ChronoSeq_PeakTime  \
0     ACTB            0.171                 1.060                 110   
1    ICAM1            0.867                11.512                 110   
2     IER3            0.514                11.495                  40   
3      IL8            1.156                13.980                 110   
4   NFKBIA            2.145                 4.740                  50   
5  TNFRSF9            0.439               123.810                 110   

   qPCR_Range  qPCR_FoldChange  qPCR_PeakTime  
0       0.072            1.015             20  
1       1.096            1.291            100  
2       0.675            1.181             40  
3       0.356            1.110            100  
4       0.791            1.208             80  
5       0.826            1.301             90  

Creating side-by-side comparison plot...
Temporal analysis complete!
Files saved:
- chronoseq_timeseries.png
- qpcr_timeseries.png
- both_timeseries_comparison.png

============================================================
KEY BIOLOGICAL INSIGHTS
============================================================

TEMPORAL RESPONSE PATTERNS:
• ACTB (housekeeping): Stable expression in both methods
• ICAM1 (adhesion): Gradual upregulation, sustained response
• IER3 (early response): Rapid peak ~30-40min, then decline
• IL8 (cytokine): Progressive increase, peak at end
• NFKBIA (NFκB inhibitor): Classic early peak ~50min
• TNFRSF9 (TNF receptor): Gradual sustained increase

METHOD COMPARISON:
• ChronoSeq captures fine temporal resolution
• qPCR shows similar patterns with different absolute levels
• Both methods identify key response phases:
  - Early response (0-30 min): IER3 activation
  - Mid response (30-60 min): NFKBIA peak, ICAM1 rise
  - Late response (60-110 min): IL8 and TNFRSF9 sustained increase
In [ ]: